Chelicerate GRAMPA
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(tidyverse)
library(cowplot)
library(ggtree)
library(viridis)
library(here)
source("../lib/design.r")
source("../lib/get_tree_info.r")
########################################readGrampa <- function(spec, type, bs, search, label) {
score_file = here("data", paste0(spec, "spec"), "grampa", paste0(type, "-bs", bs, "-", search, "-scores.txt"))
count_file = here("data", paste0(spec, "spec"), "grampa", paste0(type, "-bs", bs, "-", search, "-dup-counts.txt"))
score_data = read_tsv(score_file) %>% mutate(rank = row_number()) %>% mutate(label = label) %>% mutate(labeled.tree = paste0(labeled.tree, ";"))
count_data = read_tsv(count_file)
# Read the GRAMPA results
names(count_data)[2] = "label"
for(i in levels(as.factor(count_data$mul.tree))) {
if(i != "0"){
h_nodes = c()
for(j in 1:nrow(count_data)){
if(count_data[j,]$mul.tree == i){
if(grepl("*", count_data[j,]$label, fixed=T)) {
h_nodes = c(h_nodes, gsub("\\*", "", count_data[j,]$label))
}
}
}
count_data = count_data %>% mutate(label = ifelse(mul.tree == as.numeric(i) & label %in% h_nodes, paste0(label, "+"), label))
}
}
# This block adds the + to the hybrid clade tips in the dup counts table because GRAMPA doesn't do that...
return(list(score_data, count_data))
}
####################
plotGrampaTrees <- function(data_lowest, dup_counts, title) {
h = corecol(numcol=1, pal="wilke", offset=3)
l = corecol(numcol=1, offset=3)
# Colors
tree_fig_list = list()
tree_info_list = list()
# A list to store tree figures
first = TRUE
for(i in 1:nrow(data_lowest)){
cur_tree_num = data_lowest[i,]$mul.tree
# Get the number of the current tree in the dataset
title_str = paste0("MT-", cur_tree_num)
if(cur_tree_num == 0) {
title_str = "ST"
}
# Get the label for the plot title
tree = read.tree(text=data_lowest[i,]$labeled.tree)
tree_to_df_list = treeToDF(tree)
tree_info = tree_to_df_list[["info"]]
# Read the current tree
cur_dup_counts = dup_counts %>% filter(mul.tree == cur_tree_num)
# Get the duplication counts per branch for this tree
tree_info = inner_join(tree_info, cur_dup_counts, by="label") %>% arrange(node)
# Merge the tree info and dup counts
tree_info$label2 = ifelse(tree_info$node.type == "internal" , tree_info$label, "")
tree_fig = ggtree(tree, size=0.5, ladderize=T, aes(color=dups)) %<+% tree_info +
scale_color_viridis(name='Duplications', option="C", limits=c(0, max(dup_counts$dups)+100)) +
#scale_color_continuous(name='Duplications', low=l, high=h, limits=c(0, max(dup_counts$dups)+100)) +
xlim(0, 15) +
geom_tiplab(size=2.5) +
geom_label(aes(x=branch, label=label2), vjust=-.1, size=1.5, color="#920000", fill="transparent", label.size=NA) +
#geom_nodepoint(color="#666666", alpha=0.85, size=4)
ggtitle(paste(title_str, " (", data_lowest[i,]$score, ")", sep="")) +
theme(legend.position="none")
# Plot the current tree
#print(tree_fig)
if(first){
tree_fig = tree_fig + theme(legend.position = "right")
leg = get_legend(tree_fig)
tree_fig = tree_fig + theme(legend.position = "none")
first = FALSE
}
tree_fig_list[[i]] = tree_fig
# Add the tree to the tree list
}
title = plot_grid(NULL, labels=c(title))
tree_figs_b = plot_grid(plotlist=tree_fig_list, ncol=3)
tree_figs_leg = plot_grid(tree_figs_b, leg, ncol=2, rel_widths=c(1,0.2))
tree_figs = plot_grid(title, tree_figs_leg, nrow=2, rel_heights=c(0.1,1))
return(tree_figs)
}
########################################1 GRAMPA search limited to horseshoe crab and spider/scorpion clades
1.1 Score distributions
Black dots are the singly-labeled species tree.
Large dots are autopolyploid events.
####################
rlist = readGrampa(16, "astral-pro", 90, "hcsp", "ASTRAL-Pro")
pro_hcsp_16 = rlist[[1]]; pro_hcsp_dups_16 = rlist[[2]]
rlist = readGrampa(16, "ballesteros", 90, "hcsp", "Ballesteros")
bal_hcsp_16 = rlist[[1]]; bal_hcsp_dups_16 = rlist[[2]]
rlist = readGrampa(16, "traditional", 90, "hcsp", "Horseshoe crabs sister")
trad_hcsp_16 = rlist[[1]]; trad_hcsp_dups_16 = rlist[[2]]
grampa_data_16 = rbind(pro_hcsp_16, bal_hcsp_16, trad_hcsp_16)
grampa_data_st_16 = grampa_data_16 %>% filter(mul.tree == 0)
grampa_data_auto_16 = grampa_data_16 %>% filter(h1.node == h2.node & mul.tree != 0)
cols = corecol(numcol=3, pal="wilke")
names(cols) = c("ASTRAL-Pro", "Ballesteros", "Horseshoe crabs sister")
####################
scores_fig_16 = ggplot(grampa_data_16, aes(x=rank, y=score, color=label)) +
geom_point(size=2, alpha=0.33, show.legend=F) +
geom_point(data=grampa_data_st_16, aes(x=rank, y=score), size=3, alpha=0.75, color="#333333", fill="#999999") +
geom_point(data=grampa_data_auto_16, aes(x=rank, y=score, color=label), size=5, alpha=0.5) +
scale_x_continuous(limits=c(-10,max(grampa_data_16$rank)+10)) +
scale_color_manual(values=cols) +
xlab("Rank") +
ylab("GRAMPA score") +
bartheme() +
theme(legend.position="bottom")
#print(scores_fig_16)
# Plot scores
####################
####################
rlist = readGrampa(19, "astral-pro", 90, "hcsp", "ASTRAL-Pro")
pro_hcsp_19 = rlist[[1]]; pro_hcsp_dups_19 = rlist[[2]]
rlist = readGrampa(19, "ballesteros", 90, "hcsp", "Ballesteros")
bal_hcsp_19 = rlist[[1]]; bal_hcsp_dups_19 = rlist[[2]]
rlist = readGrampa(19, "traditional", 90, "hcsp", "Horseshoe crabs sister")
trad_hcsp_19 = rlist[[1]]; trad_hcsp_dups_19 = rlist[[2]]
grampa_data_19 = rbind(pro_hcsp_19, bal_hcsp_19, trad_hcsp_19)
grampa_data_st_19 = grampa_data_19 %>% filter(mul.tree == 0)
grampa_data_auto_19 = grampa_data_19 %>% filter(h1.node == h2.node & mul.tree != 0)
####################
scores_fig_19 = ggplot(grampa_data_19, aes(x=rank, y=score, color=label)) +
geom_point(size=2, alpha=0.33, show.legend=F) +
geom_point(data=grampa_data_st_19, aes(x=rank, y=score), size=3, alpha=0.75, color="#333333", fill="#999999") +
geom_point(data=grampa_data_auto_19, aes(x=rank, y=score, color=label), size=5, alpha=0.5) +
scale_x_continuous(limits=c(-10,max(grampa_data_19$rank)+10)) +
scale_color_manual(values=cols) +
xlab("Rank") +
ylab("GRAMPA score") +
bartheme() +
theme(legend.position="bottom")
#print(scores_fig_19)
# Plot scores
####################
leg = get_legend(scores_fig_19)
scores_fig_top = plot_grid(scores_fig_16 + theme(legend.position="none"), scores_fig_19 + theme(legend.position="none"), ncol=2, labels=c("16 species", "19species"))
scores_fig = plot_grid(scores_fig_top, leg, nrow=2, rel_heights=c(1,0.1))
print(scores_fig)1.2 Lowest scoring trees - ASTRAL-Pro
apro_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "ASTRAL-Pro") %>% slice_head(n=6), pro_hcsp_dups_16, "16 species")
apro_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "ASTRAL-Pro") %>% slice_head(n=6), pro_hcsp_dups_19, "19 species")
print(apro_trees_16)print(apro_trees_19)# Combine and render figs1.3 Lowest scoring trees - Ballesteros
bal_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "Ballesteros") %>% slice_head(n=6), bal_hcsp_dups_16, "16 species")
bal_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "Ballesteros") %>% slice_head(n=6), bal_hcsp_dups_19, "19 species")
print(bal_trees_16)print(bal_trees_19)# Combine and render figs1.4 Lowest scoring trees - Horseshoe crabs sister
trad_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "Horseshoe crabs sister") %>% slice_head(n=6), trad_hcsp_dups_16, "16 species")
trad_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "Horseshoe crabs sister") %>% slice_head(n=6), trad_hcsp_dups_19, "19 species")
print(trad_trees_16)print(trad_trees_19)# Combine and render figs2 Full GRAMPA search
2.1 Score distributions
Black dots are the singly-labeled species tree.
Large dots are autopolyploid events.
####################
rlist = readGrampa(16, "astral-pro", 90, "full", "ASTRAL-Pro")
pro_full_16 = rlist[[1]]; pro_full_dups_16 = rlist[[2]]
rlist = readGrampa(16, "ballesteros", 90, "full", "Ballesteros")
bal_full_16 = rlist[[1]]; bal_full_dups_16 = rlist[[2]]
rlist = readGrampa(16, "traditional", 90, "full", "Horseshoe crabs sister")
trad_full_16 = rlist[[1]]; trad_full_dups_16 = rlist[[2]]
grampa_data_16 = rbind(pro_full_16, bal_full_16, trad_full_16)
grampa_data_st_16 = grampa_data_16 %>% filter(mul.tree == 0)
grampa_data_auto_16 = grampa_data_16 %>% filter(h1.node == h2.node & mul.tree != 0)
####################
scores_fig_16 = ggplot(grampa_data_16, aes(x=rank, y=score, color=label)) +
geom_point(size=2, alpha=0.33, show.legend=F) +
geom_point(data=grampa_data_st_16, aes(x=rank, y=score), size=3, alpha=0.75, color="#333333", fill="#999999") +
geom_point(data=grampa_data_auto_16, aes(x=rank, y=score, color=label), size=5, alpha=0.5) +
scale_x_continuous(limits=c(-10,max(grampa_data_16$rank)+10)) +
scale_color_manual(values=cols) +
xlab("Rank") +
ylab("GRAMPA score") +
bartheme() +
theme(legend.position="bottom")
#print(cur_scores_fig)
#print(cds_apro_r70_full_scores_fig)
# Plot scores
####################
####################
rlist = readGrampa(19, "astral-pro", 90, "full", "ASTRAL-Pro")
pro_full_19 = rlist[[1]]; pro_full_dups_19 = rlist[[2]]
rlist = readGrampa(19, "ballesteros", 90, "full", "Ballesteros")
bal_full_19 = rlist[[1]]; bal_full_dups_19 = rlist[[2]]
rlist = readGrampa(19, "traditional", 90, "full", "Horseshoe crabs sister")
trad_full_19 = rlist[[1]]; trad_full_dups_19 = rlist[[2]]
grampa_data_19 = rbind(pro_full_19, bal_full_19, trad_full_19)
grampa_data_st_19 = grampa_data_19 %>% filter(mul.tree == 0)
grampa_data_auto_19 = grampa_data_19 %>% filter(h1.node == h2.node & mul.tree != 0)
####################
scores_fig_19 = ggplot(grampa_data_19, aes(x=rank, y=score, color=label)) +
geom_point(size=2, alpha=0.33, show.legend=F) +
geom_point(data=grampa_data_st_19, aes(x=rank, y=score), size=3, alpha=0.75, color="#333333", fill="#999999") +
geom_point(data=grampa_data_auto_19, aes(x=rank, y=score, color=label), size=5, alpha=0.5) +
scale_x_continuous(limits=c(-10,max(grampa_data_16$rank)+10)) +
scale_color_manual(values=cols) +
xlab("Rank") +
ylab("GRAMPA score") +
bartheme() +
theme(legend.position="bottom")
#print(cur_scores_fig)
#print(cds_apro_r70_full_scores_fig)
# Plot scores
####################
leg = get_legend(scores_fig_19)
scores_fig_top = plot_grid(scores_fig_16 + theme(legend.position="none"), scores_fig_19 + theme(legend.position="none"), ncol=2, labels=c("16 species", "19species"))
scores_fig = plot_grid(scores_fig_top, leg, nrow=2, rel_heights=c(1,0.1))
print(scores_fig)2.2 Lowest scoring trees - ASTRAL-Pro
apro_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "ASTRAL-Pro") %>% slice_head(n=6), pro_full_dups_16, "16 species")
apro_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "ASTRAL-Pro") %>% slice_head(n=6), pro_full_dups_19, "19 species")
print(apro_trees_16)print(apro_trees_19)# Combine and render figs2.3 Lowest scoring trees - Ballesteros
bal_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "Ballesteros") %>% slice_head(n=6), bal_full_dups_16, "16 species")
bal_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "Ballesteros") %>% slice_head(n=6), bal_full_dups_19, "19 species")
print(bal_trees_16)print(bal_trees_19)# Combine and render figs2.4 Lowest scoring trees - Horseshoe crabs sister
trad_trees_16 = plotGrampaTrees(grampa_data_16 %>% filter(label == "Horseshoe crabs sister") %>% slice_head(n=6), trad_full_dups_16, "16 species")
trad_trees_19 = plotGrampaTrees(grampa_data_19 %>% filter(label == "Horseshoe crabs sister") %>% slice_head(n=6), trad_full_dups_19, "19 species")
print(trad_trees_16)print(trad_trees_19)# Combine and render figs